vignettes/map_visualization.Rmd
map_visualization.Rmdejhu = enriched_jhu_data()
glimpse(ejhu)
## Rows: 86,240
## Columns: 20
## $ name <chr> "Afghanistan", "Afghanistan", "Afghanistan", "Afghanis…
## $ topLevelDomain <list> [".af", ".af", ".af", ".af", ".af", ".af", ".af", ".a…
## $ alpha2Code <chr> "AF", "AF", "AF", "AF", "AF", "AF", "AF", "AF", "AF", …
## $ alpha3Code <chr> "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AFG"…
## $ capital <chr> "Kabul", "Kabul", "Kabul", "Kabul", "Kabul", "Kabul", …
## $ region <chr> "Asia", "Asia", "Asia", "Asia", "Asia", "Asia", "Asia"…
## $ subregion <chr> "Southern Asia", "Southern Asia", "Southern Asia", "So…
## $ population <int> 27657145, 27657145, 27657145, 27657145, 27657145, 2765…
## $ area <dbl> 652230, 652230, 652230, 652230, 652230, 652230, 652230…
## $ gini <dbl> 27.8, 27.8, 27.8, 27.8, 27.8, 27.8, 27.8, 27.8, 27.8, …
## $ borders <list> [<"IRN", "PAK", "TKM", "UZB", "TJK", "CHN">, <"IRN", …
## $ numericCode <chr> "004", "004", "004", "004", "004", "004", "004", "004"…
## $ cioc <chr> "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AFG"…
## $ ProvinceState <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ CountryRegion <chr> "Afghanistan", "Afghanistan", "Afghanistan", "Afghanis…
## $ Lat <dbl> 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33…
## $ Long <dbl> 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65…
## $ date <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-…
## $ count <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ subset <chr> "confirmed", "confirmed", "confirmed", "confirmed", "c…
We need a description of the regions of the world.
data(World)
The World object has a column, geometry, that describes the shape of each country in the World dataset. Join the ejhu data.frame with the World data using dplyr join as normal.
w2 = geo_ejhu %>% filter(!is.na(date) & subset=='confirmed') %>% group_by(iso_a3) %>% filter(date==max(date)) %>% mutate(cases_per_million = 1000000*count/pop_est)
The R package ggplot2 has geospatial plotting capabilities built in for geospatial simple features (sf) data types. In this first plot, we focus in on Europe.
library(ggplot2) # transform to lat/long coordinates st_transform(w2, crs=4326) %>% # Crop to europe (rough, by hand) st_crop(xmin=-20,xmax=45,ymin=35,ymax=70) %>% ggplot() + geom_sf(aes(fill=cases_per_million)) + scale_fill_continuous( guide=guide_legend(label.theme = element_text(angle = 90), label.position='bottom') ) + labs(title='Cases per Million Inhabitants') + theme(legend.position='bottom')

Another plot, but now for Africa.
library(ggplot2) # transform to lat/long coordinates st_transform(w2, crs=4326) %>% # Crop to europe (rough, by hand) st_crop(xmin=-20,xmax=50,ymin=-60,ymax=25) %>% ggplot() + geom_sf(aes(fill=cases_per_million)) + scale_fill_continuous( guide=guide_legend(label.theme = element_text(angle = 90), label.position='bottom') ) + labs(title='Cases per Million Inhabitants') + theme(legend.position='bottom')

The following will not produce a plot when run non-interactively. However, pasting this into your R session will result in an interactive plot with multiple “layers” that you can choose to visualize different quantitative variables on the map. Zooming also works as expected.
tmap_mode('view') ## geo_ejhu %>% ## filter(!is.na(date) & subset=='confirmed') %>% ## group_by(iso_a3) %>% ## filter(date==max(date)) %>% ## tm_shape() + ## tm_polygons(col='count') w2 = geo_ejhu %>% filter(!is.na(date) & subset=='confirmed') %>% group_by(iso_a3) %>% filter(date==max(date)) %>% mutate(cases_per_million = 1000000*count/pop_est) %>% filter(region == 'Africa') m = tm_shape(w2,id='name.x', name=c('cases_per_million'),popup=c('pop_est')) + tm_polygons(c('Cases Per Million' = 'cases_per_million','Cases' = 'count',"Well-being index"='well_being', 'GINI'='gini'), selected='cases_per_million', border.alpha = 0.5, alpha=0.6, popup.vars=c('Cases Per Million'='cases_per_million', 'Confirmed Cases' ='count', 'Population' ='pop_est', 'gini' ='gini', 'Life Expectancy' ='life_exp')) + tm_facets(as.layers = TRUE) tmap_save(m, filename='abc.html')

county_geom = tidycensus::county_laea nyt_counties = nytimes_county_data() full_map = county_geom %>% left_join( nyt_counties %>% group_by(fips) %>% filter(date==max(date) & count>0 & subset=='confirmed'), by=c('GEOID'='fips')) %>% mutate(mid=sf::st_centroid(geometry)) z = ggplot(full_map, aes(label=county)) + geom_sf(aes(geometry=geometry),color='grey85') + geom_sf(aes(geometry=mid, size=count, color=count), alpha=0.5, show.legend = "point") + scale_color_gradient2(midpoint=5500, low="lightblue", mid="orange",high="red", space ="Lab" ) + scale_size(range=c(1,10)) ggplotly(z)
United States confirmed cases by County with interactive plotly library. Click and drag to zoom in to a region of interest.
A static plot as a png:
z
United States confirmed cases by County as a static graphic.
Alternatively, produce a PDF of the same plot.
## quartz_off_screen
## 2
library(sars2pack) library(tidycensus) library(dplyr) library(ggplot2) library(sf) nys = nytimes_state_data() %>% dplyr::filter(subset=='confirmed') %>% add_incidence_column(grouping_columns = c('state')) state_pops <- get_acs(geography = "state", variables = "B01003_001", geometry = TRUE) %>% mutate(centroid = st_centroid(geometry))
##
|
| | 0%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|========= | 13%
|
|========= | 14%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 18%
|
|============= | 19%
|
|============== | 19%
|
|============== | 20%
|
|============== | 21%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 23%
|
|================= | 24%
|
|================= | 25%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 27%
|
|=================== | 28%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 30%
|
|===================== | 31%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 35%
|
|========================= | 36%
|
|========================== | 36%
|
|========================== | 37%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 39%
|
|============================ | 40%
|
|============================ | 41%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|=============================== | 45%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 47%
|
|================================= | 48%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 50%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 52%
|
|===================================== | 53%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 55%
|
|======================================= | 56%
|
|======================================== | 57%
|
|======================================== | 58%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 60%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 62%
|
|============================================ | 63%
|
|============================================ | 64%
|
|============================================= | 64%
|
|============================================= | 65%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 67%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 69%
|
|================================================= | 70%
|
|================================================= | 71%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 72%
|
|=================================================== | 73%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 77%
|
|====================================================== | 78%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 79%
|
|======================================================== | 80%
|
|======================================================== | 81%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 83%
|
|=========================================================== | 84%
|
|=========================================================== | 85%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 86%
|
|============================================================= | 87%
|
|============================================================= | 88%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 90%
|
|=============================================================== | 91%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 92%
|
|================================================================= | 93%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 97%
|
|==================================================================== | 98%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 100%
nyspop = nys %>% left_join(state_pops, by=c('state'='NAME')) %>% mutate(inc_pop = inc/estimate*100000) library(geofacet) ggplot(nyspop,aes(x=date, inc_pop)) + geom_smooth() + facet_geo(~ state, grid=us_state_grid1) + ylab('Daily incidence per 100k population') + theme_light() + ggtitle('Daily new COVID-19 cases in US', subtitle=sprintf('Updated %s',format(Sys.Date(),'%b %d, %Y')))

library(wbstats) pop_data <- wb(indicator = "SP.POP.TOTL",mrv=1) ecdc = ecdc_data() %>% ungroup() %>% mutate(location_name=gsub('_',' ',location_name)) ecdc = ecdc %>% dplyr::select(subset,count,location_name,date,iso2c,continent) %>% dplyr::filter(subset=='confirmed') %>% add_incidence_column(grouping_columns = c('iso2c','location_name')) ecdcpop = ecdc %>% dplyr::left_join(select(pop_data,value,iso2c), by=c('iso2c'='iso2c')) %>% mutate(inc_pop = inc/value*100000) library(geofacet) ggplot(ecdcpop,aes(x=date, inc_pop)) + geom_smooth() + facet_geo(~ location_name, grid=geofacet::eu_grid1) + ylab('Daily incidence per 100k population') + theme_light() + ggtitle('Daily new COVID-19 cases in US', subtitle=sprintf('Updated %s',format(Sys.Date(),'%b %d, %Y')))
